NCORES = 2
registerDoParallel(NCORES)
register(DoparParam())
ncells = 1000
add = '_ziadd0.67'
fileName = sprintf('simLun_%s%s', ncells, add)
load(paste0(fileName, '.rda'))
counts = simData[[1]]$counts
counts = counts[rowSums(counts) != 0, ]
Methods
ZINB
zinb = zinbFit(counts, K = 2, commondispersion = FALSE)
W = getW(zinb)
PCA
library(rARPACK)
fastpca <- function(expr, K = 2, scale=FALSE) {
svd_raw <- svds(scale(t(expr), center=TRUE, scale=scale), k=K, nu=K, nv=0)
pc_raw <- svd_raw$u %*% diag(svd_raw$d[1:K])
return(pc_raw)
}
totalcount = function (ei)
{
sums = colSums(ei)
eo = t(t(ei)*mean(sums)/sums)
return(eo)
}
tc <- totalcount(counts)
tmm <- TMM_FN(counts)
fq <- FQT_FN(counts)
pca_raw <- fastpca(log1p(t(counts)))
pca_tc <- fastpca(log1p(tc))
pca_tmm <- fastpca(log1p(tmm))
pca_fq <- fastpca(t(log1p(fq)))
ZIFA
wrapRzifa <- function(Y, block = TRUE, k=2){
# wrapper R function for ZIFA.
# md5 hashing and temporary files are used not to re-run zifa
# if it has already be run on this computer.
d = digest(Y, "md5")
tmp = paste0(tempdir(), '/', d)
write.csv(Y, paste0(tmp, '.csv'))
if (!file.exists(paste0(tmp, "_", k, '_zifa.csv'))){
print('run ZIFA')
bb = ifelse(block, '-b ', '')
cmd = sprintf('python ../run_zifa.py -d %d %s%s.csv %s_%d_zifa.csv', k, bb, tmp, tmp, k)
system(cmd)
}
read.csv(sprintf("%s_%d_zifa.csv", tmp, k), header=FALSE)
}
zifa_raw <- wrapRzifa(log1p(counts))
## [1] "run ZIFA"
zifa_tc <- wrapRzifa(log1p(tc))
## [1] "run ZIFA"
zifa_tmm <- wrapRzifa(log1p(tmm))
## [1] "run ZIFA"
zifa_fq <- wrapRzifa(log1p(fq))
## [1] "run ZIFA"
Clustering
res <- list('ZINB-WaVE_K=2' = W,
'PCA_RAW' = pca_raw, 'PCA_TC' = pca_tc,
'PCA_TMM' = pca_tmm, 'PCA_FQ' = pca_fq,
'ZIFA_RAW' = zifa_raw, 'ZIFA_TC' = zifa_tc,
'ZIFA_TMM' = zifa_tmm, 'ZIFA_FQ' = zifa_fq)
cl <- lapply(1:length(res), function(i){
km = kmeans(res[[i]], centers = 3, iter.max = 1000, nstart = 1000)
estClust = km$cluster
par(mfrow=c(1,2))
plot(res[[i]], col = labels, main = paste(names(res)[i], '\nTrue Labels'))
plot(res[[i]], col = estClust, main = paste(names(res)[i], '\nFound Labels'))
par(mfrow=c(1,1))
estClust
})









names(cl) = names(res)
lapply(cl, table, labels)
## $`ZINB-WaVE_K=2`
## labels
## 1 2 3
## 1 333 0 0
## 2 0 333 0
## 3 0 0 334
##
## $PCA_RAW
## labels
## 1 2 3
## 1 140 152 149
## 2 123 106 117
## 3 70 75 68
##
## $PCA_TC
## labels
## 1 2 3
## 1 94 114 86
## 2 138 125 160
## 3 101 94 88
##
## $PCA_TMM
## labels
## 1 2 3
## 1 131 126 145
## 2 101 94 90
## 3 101 113 99
##
## $PCA_FQ
## labels
## 1 2 3
## 1 135 148 143
## 2 128 110 121
## 3 70 75 70
##
## $ZIFA_RAW
## labels
## 1 2 3
## 1 257 0 8
## 2 74 108 106
## 3 2 225 220
##
## $ZIFA_TC
## labels
## 1 2 3
## 1 111 53 15
## 2 221 278 4
## 3 1 2 315
##
## $ZIFA_TMM
## labels
## 1 2 3
## 1 322 7 30
## 2 6 324 1
## 3 5 2 303
##
## $ZIFA_FQ
## labels
## 1 2 3
## 1 0 16 288
## 2 198 118 34
## 3 135 199 12